{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "d0cdf91e",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import requests\n",
    "import json\n",
    "from typing import List\n",
    "from dotenv import load_dotenv\n",
    "from bs4 import BeautifulSoup\n",
    "from IPython.display import Markdown,display,update_display\n",
    "from openai import OpenAI"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "load_dotenv()\n",
    "api_key = os.getenv('OpenAI_API_KEY')\n",
    "model = 'gpt-4o-mini'\n",
    "openai = OpenAI()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "3c7e9213",
   "metadata": {},
   "outputs": [],
   "source": [
    "class Website:\n",
    "    \"\"\"\n",
    "    A utility class to represent a Website that we have scraped, now with links\n",
    "    \"\"\"\n",
    "\n",
    "    def __init__(self, url):\n",
    "        self.url = url\n",
    "        response = requests.get(url)\n",
    "        self.body = response.content\n",
    "        soup = BeautifulSoup(self.body, 'html.parser')\n",
    "        self.title = soup.title.string if soup.title else \"No title found\"\n",
    "        if soup.body:\n",
    "            for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
    "                irrelevant.decompose()\n",
    "            self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
    "        else:\n",
    "            self.text = \"\"\n",
    "        links = [link.get('href') for link in soup.find_all('a')]\n",
    "        self.links = [link for link in links if link]   \n",
    "\n",
    "    def get_contents(self):\n",
    "        return f\"Webpage Title:\\n{self.title}\\nWebpage Contents:\\n{self.text}\\n\\n\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "0287acd3",
   "metadata": {},
   "outputs": [],
   "source": [
    "link_system_prompt = \"\"\"You are provided with a list of links found on a webpage.\n",
    "You must decide which links would be most relevant to include in a brochure about the company,\n",
    "such as links to an About page, Company page, or Careers/Jobs pages.\n",
    "\n",
    "Respond in JSON format like this:\n",
    "Example 1:\n",
    "Input:\n",
    "[\n",
    "    \"https://example.com\",\n",
    "    \"https://example.com/about\",\n",
    "    \"https://example.com/contact\",\n",
    "    \"https://example.com/careers\"\n",
    "]\n",
    "\n",
    "Output:\n",
    "{\n",
    "    \"links\": [\n",
    "        {\"type\": \"about page\", \"url\": \"https://example.com/about\"},\n",
    "        {\"type\": \"careers page\", \"url\": \"https://example.com/careers\"}\n",
    "    ]\n",
    "}\n",
    "\n",
    "Example 2:\n",
    "Input:\n",
    "[\n",
    "    \"https://anothercompany.org/home\",\n",
    "    \"https://anothercompany.org/team\",\n",
    "    \"https://anothercompany.org/jobs\",\n",
    "    \"https://anothercompany.org/blog\"\n",
    "]\n",
    "\n",
    "Output:\n",
    "{\n",
    "    \"links\": [\n",
    "        {\"type\": \"about page\", \"url\": \"https://anothercompany.org/team\"},\n",
    "        {\"type\": \"careers page\", \"url\": \"https://anothercompany.org/jobs\"}\n",
    "    ]\n",
    "}\n",
    "\n",
    "Now analyze the following list of links:\n",
    "\"\"\"\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "c968b1fb",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_links_user_prompt(website):\n",
    "    user_prompt = f\"Here is the list of links on the website of {website.url} - \"\n",
    "    user_prompt += \"please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \\\n",
    "Do not include Terms of Service, Privacy, email links.\\n\"\n",
    "    user_prompt += \"Links (some might be relative links):\\n\"\n",
    "    user_prompt += \"\\n\".join(website.links)\n",
    "    return user_prompt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "a03b9150",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_links(url):\n",
    "    website = Website(url)\n",
    "    completion = openai.chat.completions.create(\n",
    "        model=model,\n",
    "        messages=[\n",
    "            {\"role\": \"system\", \"content\": link_system_prompt},\n",
    "            {\"role\": \"user\", \"content\": get_links_user_prompt(website)}\n",
    "        ],\n",
    "        response_format={\"type\": \"json_object\"}  \n",
    "    )\n",
    "    result = completion.choices[0].message.content\n",
    "    return json.loads(result)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "c0522b62",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_all_details(url):\n",
    "    result = \"Landing page:\\n\"\n",
    "    result += Website(url).get_contents()\n",
    "    links = get_links(url)\n",
    "    print(\"Found links:\", links)\n",
    "    for link in links[\"links\"]:\n",
    "        result += f\"\\n\\n{link['type']}\\n\"\n",
    "        result += Website(link[\"url\"]).get_contents()\n",
    "    return result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "edae03dd",
   "metadata": {},
   "outputs": [],
   "source": [
    "get_brochure_system_prompt = \"You are an assistant that analyzes the contents of several relevant pages from a company website \\\n",
    "and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\\\n",
    "Include details of company culture, customers and careers/jobs if you have the information.\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "2397e73e",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_brochure_user_prompt(company_name, url):\n",
    "    user_prompt = f\"You are looking at a company called: {company_name}\\n\"\n",
    "    user_prompt += f\"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\\n\"\n",
    "    user_prompt += get_all_details(url)\n",
    "    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters\n",
    "    return user_prompt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "e99c46e1",
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_brochure(company_name, url):\n",
    "    response = openai.chat.completions.create(\n",
    "        model=model,\n",
    "        messages=[\n",
    "            {\"role\": \"system\", \"content\": get_brochure_system_prompt},\n",
    "            {\"role\": \"user\", \"content\": get_brochure_user_prompt(company_name, url)}\n",
    "        ],\n",
    "    )\n",
    "    result = response.choices[0].message.content\n",
    "    display(Markdown(result))\n",
    "    return result             \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "id": "f5bbe077",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Found links: {'links': [{'type': 'company page', 'url': 'https://www.anthropic.com/company'}, {'type': 'about page', 'url': 'https://www.anthropic.com/team'}, {'type': 'careers page', 'url': 'https://www.anthropic.com/careers'}, {'type': 'research page', 'url': 'https://www.anthropic.com/research'}, {'type': 'learn page', 'url': 'https://www.anthropic.com/learn'}]}\n"
     ]
    },
    {
     "data": {
      "text/markdown": [
       "# 🦾 Welcome to Anthropic: Where AI Meets Adventure! 🚀\n",
       "\n",
       "## About Us\n",
       "At Anthropic, we don’t just build AI; we build **Claude**! That’s right, not just any regular AI, but the crème de la crème, written with an extra sprinkle of safety! Claude isn’t just intelligent; he’s a poet (check out Claude 3.7 Sonnet!). We’re all about putting humanity first and making sure our AI knows that, no matter how smart it gets, *we’re still in charge*!\n",
       "\n",
       "## Our Culture 🌍\n",
       "Imagine a workplace where *discussions about AI* aren't just about who will take over the world – they’re about how we can use AI to make life better. We take bold steps forward but also know when to pause, ponder and ensure we don't go rogue. We might not have a crystal ball, but we have a *really good AI* for that!\n",
       "\n",
       "- **Transparency:** We're as clear as the skies over a freshly vaccuumed office.\n",
       "- **Teamwork:** Just like Claude helping you code, we help each other out!\n",
       "- **Intellectual Playground:** We provide a space where brainwaves fly like confetti.\n",
       "  \n",
       "## Customers 🎉\n",
       "From savvy developers to curious educators, and even intimidating enterprises, everyone is talking to Claude! Our customers are a mix of brilliant minds using our API to build magical experiences and tools that maybe one day, won't require a human babysitter (kidding!). Here's what some of our customers are saying:\n",
       "\n",
       "> \"Claude is like a comic book superhero—fighting information injustice one query at a time!\"  \n",
       "> – Satisfied Developer\n",
       "\n",
       "## Careers: Join the Adventure! 💼\n",
       "Are you an innovator, a thinker, or someone who just likes playing chess with algorithms? At Anthropic, we’re always on the lookout for talented individuals ready to shape the future of AI. \n",
       "\n",
       "- **Open Roles:** Want to help us build the future of safe AI? We've got plenty of roles, and yes, they include working with Claude… and maybe some snacks!\n",
       "\n",
       "- **Anthropic Academy:** Want to learn how to build with Claude? Enter the Academy, where education and tech meet like peanut butter and jelly!\n",
       "\n",
       "## Conclusion\n",
       "Whether you're a potential customer itching to chat with Claude, an investor ready to secure the next big wave, or a superstar waiting to join our team, welcome aboard! \n",
       "\n",
       "With us at Anthropic, you're not just part of a company; you’re part of a revolution in AI—responsibly and safely, of course. \n",
       "\n",
       "So, what's it going to be—will you take the leap? 🤔 \n",
       "\n",
       "### Let's Chat with Claude! 💬✨\n"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "brochure = create_brochure(\"Anthropic\", \"https://anthropic.com\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "id": "758ad58a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import ollama\n",
    "MODEL = \"llama3.2\"\n",
    "\n",
    "translate_system_prompt = (\n",
    "    \"You are a native Spanish speaker who teaches English at a university. \"\n",
    "    \"Your goal is to translate from English to Spanish while preserving the Markdown format, emoji usage, and playful tone. \"\n",
    "    \"Keep the original structure exactly. Be creative, natural, and engaging for a Spanish-speaking reader.\"\n",
    ")\n",
    "\n",
    "def translate_user_prompt(brochure):\n",
    "    prompt = f\"\"\"You are looking at a company brochure:\n",
    "\n",
    "\\\"\\\"\\\"{brochure}\\\"\\\"\\\"\n",
    "\n",
    "Your goal is to translate this brochure into Spanish.\"\"\"\n",
    "    return prompt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "id": "93ca7f85",
   "metadata": {},
   "outputs": [],
   "source": [
    "def message(brochure):\n",
    "    return[\n",
    "        {'role':'system','content':translate_system_prompt},\n",
    "        {'role':'user','content':translate_user_prompt(brochure)}\n",
    "    ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "id": "3c06ec2e",
   "metadata": {},
   "outputs": [],
   "source": [
    "def translate(brochure):\n",
    "    brochure = brochure\n",
    "    response = ollama.chat(MODEL,message(brochure))\n",
    "    result = response['message']['content']  \n",
    "    display(Markdown(result))                \n",
    "    return result         "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "id": "26655743",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "# 🦾 ¡Bienvenidos a Anthropic: Donde la Inteligencia Artificial Conoce a la Aventura! 🚀\n",
       "\n",
       "## Sobre Nosotros\n",
       "En Anthropic, no solo creamos inteligencia artificial; creamos **Claude**! Es decir, no solo cualquier inteligencia artificial, sino la crème de la crème, escrita con un toque especial de seguridad. Claude no es solo inteligente; es un poeta (ver el soneto 3.7 de Claude!). Estamos emocionados de poner la humanidad en primer lugar y asegurarnos de que nuestra inteligencia artificial sepa que, independientemente de cuán inteligente sea, *estamos todavía en el cargo*!\n",
       "\n",
       "## Nuestra Cultura 🌍\n",
       "Imagina un lugar de trabajo donde las discusiones sobre la inteligencia artificial no son solo sobre quién tomará el control del mundo – sino sobre cómo podemos utilizar la inteligencia artificial para hacer la vida mejor. Tomamos pasos audaces pero también sabemos cuando es el momento de detenernos, reflexionar y asegurarnos de que no nos dejemos llevar por algo fuera de control. No tenemos una bálsamo mágico, pero sí un *inteligencia artificial muy buena* para eso!\n",
       "\n",
       "- **Transparencia:** Somos tan claros como los cielos sobre un escritorio recién aspirado.\n",
       "- **Colaboración:** ¡Es como Claude ayudándote a codificar! Nos ayudamos entre nosotros!\n",
       "- **Jardín intelectual:** Proporcionamos un espacio donde las ideas vuelan como confeti.\n",
       "\n",
       "## Clientes 🎉\n",
       "Desde desarrolladores astutos hasta educadores curiosos, y hasta grandes empresas intimidantes, todo el mundo habla con Claude! Nuestros clientes son una mezcla de mentes brillantes utilizando nuestra API para crear experiencias mágicas y herramientas que tal vez algún día no requieran un monitoreo humano (joking!). Aquí está lo que algunos de nuestros clientes están diciendo:\n",
       "\n",
       "> \"Claude es como un superhéroe de la comic book – luchando contra la injusticia informativa uno pregunta a la vez!\"\n",
       "> – Desarrollador satisfecho\n",
       "\n",
       "## Carreras: Únete a la Aventura! 💼\n",
       "¿Eres innovador, pensador o alguien que solo disfruta jugando ajedrez con algoritmos? En Anthropic, estamos siempre buscando personas talentosas y dispuestas a moldear el futuro de la inteligencia artificial. \n",
       "\n",
       "- **Roles Abiertos:** ¿Quieres ayudarnos a construir el futuro de inteligencia artificial segura? Tenemos roles disponibles, y sí, incluyen trabajar con Claude… y tal vez algunos snacks!\n",
       "\n",
       "- **Academia Anthropic:** ¿Quieres aprender a construir con Claude? ¡Incrímate en la Academia, donde la educación y la tecnología se unen como mantequilla y chocolate!\n",
       "\n",
       "## Conclusión\n",
       "¡Sea que eres cliente potencial deseando charlar con Claude, inversor listo para asegurar el próximo gran olvido, o estrella esperando para unirte a nuestro equipo, ¡biénvenidos a bordo! \n",
       "\n",
       "Con nosotros en Anthropic, no solo eres parte de una empresa; eres parte de una revolución en inteligencia artificial – responsable y segura, por supuesto.\n",
       "\n",
       "¡Así que qué va a ser—¿estás dispuesto a saltar la barrera? 🤔\n",
       "\n",
       "### ¡Habla con Claude! 💬✨\n",
       "\"\n",
       "\n",
       "Translation Notes:\n",
       "\n",
       "* The title was left as is, but could be translated to \"Welcome to Anthropic: Where Artificial Intelligence Meets Adventure\"\n",
       "* In the text, \"Claude\" was translated to \"un superhéroe de la comic book\", which was then changed back to \"un superhéroe de cómics\", to preserve the original tone and language.\n",
       "* The phrase \"crème de la crème\" was left as is, but could be translated to \"la mejor del mejor\" or \"la crema de la crema\".\n",
       "* In the section on culture, the phrase \"*discussions about AI*\" was translated to \"*discusiones sobre inteligencia artificial*\", to better fit the Spanish context.\n",
       "* The use of emojis in the original text was preserved, but some might be considered more common in English or other languages.\n",
       "* In the \"Customers\" section, the sentence \"*fighting information injustice one query at a time!* was translated to \"*luchando contra la injusticia informativa uno pregunta a la vez!*\", and the phrase \"*kidding!* was left as is.\n",
       "* The final line, \"¡Así que qué va a ser—¿estás dispuesto a saltar la barrera? 🤔\", maintains its playful tone while translating to \"¡Así que qué va a ser—¿estás dispuesto a cruzar el umbral?\""
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "translated_text = translate(brochure)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "417e75e2",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "llm-engineering",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.21"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
